library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggplot2)
library(ggmap)
## ℹ Google's Terms of Service: <]8;;https://mapsplatform.google.comhttps://mapsplatform.google.com]8;;>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(tidyr)
library(lubridate)
url_in <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"

file_names <- c("time_series_covid19_confirmed_US.csv",
                "time_series_covid19_confirmed_global.csv",
                "time_series_covid19_deaths_US.csv",
                "time_series_covid19_deaths_global.csv")

urls <- str_c(url_in, file_names)
global_cases <- read_csv(urls[1])
global_deaths <- read_csv(urls[2])
US_cases <- read_csv(urls[3])
US_deaths <- read_csv(urls[4])
library(tidyr)
global_cases <- global_cases[,-1:-6]
global_cases <- global_cases[,-5]
US_cases <- US_cases[, -1: -6]
US_cases <- US_cases[, -5]

After importing the data sets several modifications were made to each data set. In particular the format in which the date information was presented was unusable, so each graph had to undergo a pivot_longer operation in order to format the information. Severall columns were also removed since I only needed the latitude, longitude, date, and case columns for the analysis that I had planned. The date information also had to be formatted so that ggplot2 would recognize the information as dates and not simple stings.


global_cases <- global_cases %>%
  pivot_longer(cols = -c('Province_State', 'Country_Region',
                         'Lat', 'Long_',), 
                names_to = "date",
                values_to = "cases")
global_deaths <- global_deaths %>%
  pivot_longer(cols = -c('Province/State', 'Country/Region',
                         'Lat', 'Long'),
               names_to = "date",
               values_to = "cases")
US_cases  <- US_cases %>%
  pivot_longer(cols = -c('Province_State', 'Country_Region',
                         'Lat', 'Long_'),
               names_to = "date",
               values_to = "cases")
US_deaths <- US_deaths %>%
  pivot_longer(cols = -c('Province/State', 'Country/Region', 
                         'Lat', 'Long'),
               names_to = "date",
               values_to = "cases")
US_cases$date <- mdy(US_cases$date)
global_cases$date <- mdy(global_cases$date)
US_deaths$date <- mdy(US_deaths$date)
global_deaths$date <- mdy(global_deaths$date)

When I originally tried to plot the latitude and longitude data four the four data sets in this project, I had numerous issues with outliers effecting the plots I had created. In order to fix this I used a technique to remove the outlier datapoints from my samp[le size. Please see https://cran.r-project.org/web/packages/ggmap/readme/README.html for more information on where these formula’s came from.

Q_US_cases <- quantile(US_cases$cases, probs = c(.25, .75), na.rm = FALSE)
iqr_US_cases <- IQR(US_cases$cases)
up_US_cases <- Q_US_cases[2]+1.5*iqr_US_cases #Upper Range
low_US_cases <- Q_US_cases[1]-1.5*iqr_US_cases # Lower Range
US_cases_eliminated <- subset(US_cases, US_cases$cases > (Q_US_cases[1] - 1.5*iqr_US_cases) & US_cases$cases < (Q_US_cases[2]+1.5*iqr_US_cases))
Q_global_cases <- quantile(global_cases$cases, probs = c(.25, .75), na.rm = FALSE)
iqr_global_cases <- IQR(global_cases$cases)
up_global_cases <- Q_global_cases[2]+1.5*iqr_global_cases #Upper Range
low_global_cases <- Q_global_cases[1]-1.5*iqr_global_cases # Lower Range
global_cases_eliminated <- subset(global_cases, global_cases$cases > (Q_global_cases[1] - 1.5*iqr_global_cases) & global_cases$cases < (Q_global_cases[2]+1.5*iqr_global_cases))
Q_US_deaths <- quantile(US_deaths$cases, probs = c(.25, .75), na.rm = FALSE)
iqr_US_deaths <- IQR(US_deaths$cases)
up_US_deaths <- Q_US_deaths[2]+1.5*iqr_US_deaths #Upper Range
low_US_deaths <- Q_US_deaths[1]-1.5*iqr_US_deaths # Lower Range
US_deaths_eliminated <- subset(US_deaths, US_deaths$cases > (Q_US_deaths[1] - 1.5*iqr_US_deaths) & US_deaths$cases < (Q_US_deaths[2]+1.5*iqr_US_deaths))
Q_global_deaths <- quantile(global_deaths$cases, probs = c(.25, .75), na.rm = FALSE)
iqr_global_deaths <- IQR(global_deaths$cases)
up_global_deaths <- Q_global_deaths[2]+1.5*iqr_global_deaths #Upper Range
low_global_deaths <- Q_global_deaths[1]-1.5*iqr_global_deaths # Lower Range
global_deaths_eliminated <- subset(global_deaths, global_deaths$cases > (Q_global_deaths[1] - 1.5*iqr_global_deaths) & global_deaths$cases < (Q_global_deaths[2]+1.5*iqr_global_deaths))

The following section shows the plots for number of cases or deaths per day over the course of the entire data set for this project. I was experimenting with a heat map visualization to show the time series progression of the pandemic, but I could not get the plot to work in the amount of time allotted for this project. Overall This plot is not particularly useful, the density of the individual plots is too great and results in an essentially uniform color gradient with no distinguishable information. If I had more time I would look into trying to subset the data sets to show either the monthly progression or the only show the plot for one state at a Time. These would allow for more use to be obtained from the otherwise lackluster plots I have created for this assignment.


ggplot(US_cases_eliminated, aes(x=date, y=cases, color=US_cases_eliminated$cases)) +
  geom_point()

ggplot(global_cases_eliminated, aes(x=date, y=cases, color=global_cases_eliminated$cases)) +
  geom_point()

ggplot(US_deaths_eliminated, aes(x=date, y=cases, color=US_deaths_eliminated$cases)) +
  geom_point()

ggplot(global_deaths_eliminated, aes(x=date, y=cases, color=global_deaths_eliminated$cases)) +
  geom_point()


base_map_us <- get_map("United States", zoom = 4, maptype = "hybrid")
## ℹ <]8;;https://maps.googleapis.com/maps/api/staticmap?center=United%20States&zoom=4&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/staticmap?center=United%20States&zoom=4&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcM]8;;>
## ℹ <]8;;https://maps.googleapis.com/maps/api/geocode/json?address=United+States&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/geocode/json?address=United+States&key=xxx-yct8A7mGaYook8HcM]8;;>
ggmap(base_map_us) + 
  geom_point(data = US_cases_eliminated,
             aes(x=Long_, y=Lat, color=cases), 
             size = .02, alpha = .5) + 
  scale_color_gradient(low = "green", high = "red")

base_map_earth <- get_map("Congo", zoom = 1, maptype = "hybrid")
## ℹ <]8;;https://maps.googleapis.com/maps/api/staticmap?center=Congo&zoom=1&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/staticmap?center=Congo&zoom=1&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcM]8;;>
## ℹ <]8;;https://maps.googleapis.com/maps/api/geocode/json?address=Congo&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/geocode/json?address=Congo&key=xxx-yct8A7mGaYook8HcM]8;;>
ggmap(base_map_earth) + 
  geom_point(data = global_cases_eliminated,
             aes(x=Long_, y=Lat, color=cases), 
             size = .01, alpha = .5) + 
  scale_color_gradient(low = "green", high = "red")

base_map_us_death <- get_map("United States", zoom = 4, maptype = "hybrid")
## ℹ <]8;;https://maps.googleapis.com/maps/api/staticmap?center=United%20States&zoom=4&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/staticmap?center=United%20States&zoom=4&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcM]8;;>
## ℹ <]8;;https://maps.googleapis.com/maps/api/geocode/json?address=United+States&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/geocode/json?address=United+States&key=xxx-yct8A7mGaYook8HcM]8;;>
ggmap(base_map_us_death) + 
  geom_point(data = US_deaths_eliminated,
             aes(x=Long, y=Lat, color=cases), 
             size = .01, alpha = .5) + 
  scale_color_gradient(low = "red", high = "black")

base_map_earth_death <- get_map("Congo", zoom = 1, maptype = "hybrid")
## ℹ <]8;;https://maps.googleapis.com/maps/api/staticmap?center=Congo&zoom=1&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/staticmap?center=Congo&zoom=1&size=640x640&scale=2&maptype=hybrid&language=en-EN&key=xxx-yct8A7mGaYook8HcM]8;;>
## ℹ <]8;;https://maps.googleapis.com/maps/api/geocode/json?address=Congo&key=xxx-yct8A7mGaYook8HcMhttps://maps.googleapis.com/maps/api/geocode/json?address=Congo&key=xxx-yct8A7mGaYook8HcM]8;;>
ggmap(base_map_earth_death) + 
  geom_point(data = global_deaths_eliminated,
             aes(x=Long, y=Lat, color=cases), 
             size = .01, alpha = .5) + 
  scale_color_gradient(low = "red", high = "black")

The maps above were created in order to try to illustrate the density of the infection and death rate of individuals across the country. Overall they are adequate, but I believe they could use a little more refinement. I am not sure that the red to black gradient reads particularly well on the death map plots, and the labeling is a bit sloppy. I have seen other maps that use county or state information and show the total number of infections in a generall area, and I believe this would be a better representation of the information. Similar to the plots above I think the density of information is too high for a person to easily gain any significant information from the graph. I would like to try and tweak the transparency of the individual data points in a future update to this graph.


sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggmap_3.0.1     lubridate_1.9.2 forcats_1.0.0   stringr_1.5.0  
##  [5] dplyr_1.1.0     purrr_1.0.1     readr_2.1.4     tidyr_1.3.0    
##  [9] tibble_3.1.8    ggplot2_3.4.1   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0    xfun_0.37           bslib_0.4.2        
##  [4] lattice_0.20-45     colorspace_2.1-0    vctrs_0.5.2        
##  [7] generics_0.1.3      htmltools_0.5.4     yaml_2.3.7         
## [10] utf8_1.2.3          rlang_1.0.6         jquerylib_0.1.4    
## [13] pillar_1.8.1        glue_1.6.2          withr_2.5.0        
## [16] bit64_4.0.5         sp_1.6-0            jpeg_0.1-10        
## [19] lifecycle_1.0.3     plyr_1.8.8          munsell_0.5.0      
## [22] gtable_0.3.1        RgoogleMaps_1.4.5.3 evaluate_0.20      
## [25] labeling_0.4.2      knitr_1.42          tzdb_0.3.0         
## [28] fastmap_1.1.1       curl_5.0.0          parallel_4.2.1     
## [31] fansi_1.0.4         highr_0.10          Rcpp_1.0.10        
## [34] scales_1.2.1        cachem_1.0.7        vroom_1.6.1        
## [37] jsonlite_1.8.4      farver_2.1.1        bit_4.0.5          
## [40] hms_1.1.2           png_0.1-8           digest_0.6.31      
## [43] stringi_1.7.12      grid_4.2.1          cli_3.6.0          
## [46] tools_4.2.1         bitops_1.0-7        magrittr_2.0.3     
## [49] sass_0.4.5          crayon_1.5.2        pkgconfig_2.0.3    
## [52] ellipsis_0.3.2      timechange_0.2.0    rmarkdown_2.20     
## [55] httr_1.4.5          rstudioapi_0.14     R6_2.5.1           
## [58] compiler_4.2.1